This document demonstrates the use of the bRF and LASSO-D3S functions for integrative GRN inference.

Those functions infer the regulatory pathways of Arabidopsis thaliana’s roots in response to nitrate (N) induction from Varala et al., 2018.

They use as inputs the expression profiles of N-responsive genes and TFBS information. Prior TFBS information was built by searching in the promoters of the N-responsive genes the PWM of the N-responsive regulators.

Data import

Import of the expression data and the N-responsive genes and regulators :

load('rdata/inference_input_N_response_varala.rdata')
genes <- input_data$grouped_genes; length(genes)
## [1] 1426
tfs <- input_data$grouped_regressors; length(tfs)
## [1] 201
counts <- input_data$counts; dim(counts)
## [1] 1426   45
load("rdata/pwm_occurrences_N_response_varala.rdata")
dim(pwm_occurrence)
## [1] 1426  201
# values of mse per gene per alpha per rep for true and shuffled data
load("results/brf_perumtations_mse_all_genes.rdata")
lmses_rf <- lmses
load("results/lasso_perumtations_mse_all_genes.rdata")
lmses_lasso <- lmses
rm(lmses)

# groups of genes depending on their behavior toward data integration
load("results/clusters_mse_bRF_100permutations.rdata")
positive_genes_rf <- names(clusters_rf[clusters_rf==2])

load("results/clusters_mse_lasso_100permutations.rdata")
positive_genes_lasso <- names(clusters_lasso[clusters_lasso==1])

Get optimal alphas per gene

This chunk of code returns the value of alpha where there is a maximal difference between true data and shuffled data in the MSE curves.

get_optimal_alphas_per_genes <- function(model = "rf", strategy = "min_mse",
                                         alpha_for_other_genes = 0){
  if(model == "rf"){
    lmses = lmses_rf
    positive_genes = positive_genes_rf
  }
  
  if(model == "lasso"){
    lmses = lmses_lasso
    positive_genes = positive_genes_lasso
  }
data <- lmses %>%
rownames_to_column('gene') %>%
  reshape2::melt()%>%
  separate(variable,
           into = c("alpha", "rep", "MSEtype"),
           sep = " ") %>%
  group_by(gene, alpha, MSEtype) %>%
  mutate(mean_mse = mean(value, na.rm = T),
         sd_mse = sd(value, na.rm = T)) %>%
  dplyr::select(mean_mse, sd_mse, gene, alpha, MSEtype)%>%
  distinct() 

data_true <- filter(data, MSEtype=="true_data")
data_perm <- filter(data, MSEtype=="shuffled")

data_true$mean_mse_perm <- data_perm$mean_mse
data_true$sd_mse_perm <- data_perm$sd_mse

if(strategy == "min_mse"){
  alphas_opt <- data_true %>%
  filter(gene %in% positive_genes) %>%
   mutate(mean_mse_diff = mean_mse) %>%
  group_by(gene) %>%
  slice_min(order_by = mean_mse_diff) %>%
  select(gene, alpha) %>%
  column_to_rownames("gene")
}

  if(strategy == "max_div"){
    alphas_opt <- data_true %>%
    filter(gene %in% positive_genes) %>%
    mutate(mean_mse_diff = (mean_mse-mean_mse_perm)/(sd_mse_perm+sd_mse)) %>%
    group_by(gene) %>%
    slice_min(order_by = mean_mse_diff) %>%
    select(gene, alpha) %>%
    column_to_rownames("gene")
  }
  # named vector,
  # adding a value of alpha for genes that do not 
  # benefit from data integration
  all_alphas_per_genes <- c(setNames(as.numeric(alphas_opt$alpha), 
                               rownames(alphas_opt)), 
                          setNames(rep(alpha_for_other_genes, 
                                       length(setdiff(genes, positive_genes))), 
                                  nm=setdiff(genes, positive_genes)))
  
  return(all_alphas_per_genes)
}

alphas_per_genes = list()
for(model in c("rf", "lasso")){
  for(strat in c("min_mse", "max_div")){
    for(alphas_other in c(0,1)){
      alphas_per_genes[[paste0(model, "_", strat, "_", alphas_other)]] <- 
      get_optimal_alphas_per_genes(model = model, strategy = strat, 
                                   alpha_for_other_genes =  alphas_other)
    }
  }
}

save(alphas_per_genes, file = "results/alphas_per_genes.rdata")

see the optimal alpha in the curves

load("results/alphas_per_genes.rdata")

draw_gene_mean_sd <- function(gene, model="rf", title = NULL){
  if(model == "rf"){
    lmses = lmses_rf
  }
  
  if(model == "lasso"){
    lmses = lmses_lasso
  }
  
  plot <- lmses[gene, ] %>%
    gather() %>%
    separate(key,
             into = c("alpha", "rep", "MSEtype"),
             sep = " ") %>%
    group_by(alpha, MSEtype) %>%
    mutate(mean_mse = mean(value, na.rm = T),
           sd_mse = sd(value, na.rm = T),
           alpha = as.numeric(alpha)) %>%
    ggplot(aes(
      x = as.numeric(alpha),
      y = value,
      color = MSEtype,
      fill = MSEtype
    )) +ggtitle(paste(gene, title))+ylab("MSE/Var(gene)")+
    geom_ribbon(aes(ymin = mean_mse - sd_mse , 
                    ymax = mean_mse + sd_mse  ), 
                alpha = .4)  +theme_pubr(legend = "top")+
    geom_point(alpha = 0.1) + geom_smooth(se=F)+xlab("alpha") 
  if(model == "rf")
    return(plot+
    geom_vline(xintercept = as.numeric(alphas_per_genes$rf_min_mse_0[gene]))+
    geom_vline(xintercept = as.numeric(alphas_per_genes$rf_max_div_0[gene]), color = "darkred"))
  if(model == "lasso")
    return(plot+
    geom_vline(xintercept = as.numeric(alphas_per_genes$lasso_min_mse_0[gene]))+
    geom_vline(xintercept = as.numeric(alphas_per_genes$lasso_max_div_0[gene]), color = "darkred"))
}

for(gene in sample(genes, 20)){
  print(draw_gene_mean_sd(gene, model = "lasso", title = "lasso")+
          draw_gene_mean_sd(gene, model = "rf", title = "rf"))
}

for(gene in "AT1G08090"){
  print(draw_gene_mean_sd(gene, model = "lasso", title = "lasso")+
          draw_gene_mean_sd(gene, model = "rf", title = "rf"))
}

"AT1G08090" %in% positive_genes_rf
## [1] TRUE
nCores = 35
networks <- list()
densities <- c(0.005, 0.01,0.05,0.075)

for(model in c("lasso", "rf")){
  for(strat in c("max_div")){
    for(alphas_other in c(0)){
      for(g in c("all", "positive")){
        for(shuffle in c( F, T)){
          for(rep in 1:10){
            if(g == "all") gene_list <- genes
            else{
              if(model == "rf") gene_list <- positive_genes_rf
              else gene_list <- positive_genes_lasso
            }
            
            print(paste0(model, "-", strat, "-", alphas_other, "-", g, "-", 
                               ifelse(shuffle, "shuffled", "trueData"), "-", rep))
            
            if(model == "rf"){
              mat <- bRF_inference(counts=counts, genes= gene_list, tfs=tfs, 
                          alpha = alphas_per_genes[[paste0(model, "_", strat, "_", alphas_other)]], 
              tf_expression_permutation = shuffle, pwm_occurrence = pwm_occurrence,
              nTrees = 2000, importance = "%IncMSE", nCores = nCores)
              
              for(density in densities){
                networks[[paste0(model, "-", strat, "-", alphas_other, "-", g, "-", 
                               ifelse(shuffle, "shuffled", "trueData"), "-", rep, "-", density)]] <-
                bRF_network(mat, density, pwm_occurrence, gene_list, tfs)
              }
            }
            
            if(model == "lasso"){
              mat <- LASSO.D3S_inference(counts, gene_list, tfs,
                                   alpha = alphas_per_genes[[paste0(model, "_", strat, "_", alphas_other)]], 
                                   N = 100, tf_expression_permutation = shuffle,
                                   int_pwm_noise = 0, score = "pval",
                                   pwm_occurrence = pwm_occurrence,
                                   nCores = nCores)
              
              for(density in densities){
                networks[[paste0(model, "-", strat, "-", alphas_other, "-", g, "-", 
                               ifelse(shuffle, "shuffled", "trueData"), "-", rep, "-", density)]] <-
                LASSO.D3S_network(mat, density, pwm_occurrence, gene_list, tfs)
              }
            }
          }
        }
      }
    }
  }
}
save(networks, file = "results/gene_specific_alphas_grns_max_div_0others.rdata")
load("results/gene_specific_alphas_grns_max_div_0others.rdata")
nCores = 20


pos_nets <- networks[str_detect(names(networks), 'positive')]
all_nets <- networks[!str_detect(names(networks), 'positive')]
names(pos_nets)

pos_nets_lasso <- pos_nets[str_detect(names(pos_nets), 'lasso')]
pos_nets_rf <- pos_nets[str_detect(names(pos_nets), 'rf')]

all_nets_lasso <- all_nets[str_detect(names(all_nets), 'lasso')]
all_nets_rf <- all_nets[str_detect(names(all_nets), 'rf')]


val <- rbind.data.frame(evaluate_networks(pos_nets_lasso, nCores = nCores,
                  input_genes = positive_genes_lasso ),
                 evaluate_networks(pos_nets_rf, nCores = nCores,
                  input_genes = positive_genes_rf),
                 evaluate_networks(all_nets_lasso, nCores = nCores,
                  input_genes = genes),
                 evaluate_networks(all_nets_rf, nCores = nCores,
                  input_genes = genes ))

settings <- c("model", "alphaOpt", "alphaOtherGenes", "genesInNetwork", "dataset", "rep", "density")

val[, settings] <-
  str_split_fixed(val$network_name, '-', length(settings))


val %>%
ggplot(aes(x=dataset, y = precision, color = dataset)) + 
  ggh4x::facet_nested_wrap(vars(model, density, genesInNetwork), 
                           ncol = 8, nest_line = T) + 
  geom_point() + stat_compare_means(label = "p.signif")

val %>%
ggplot(aes(x=dataset, y = recall, color = dataset)) + 
  ggh4x::facet_nested_wrap(vars(model, density, genesInNetwork), scales = "free",
                           ncol = 8, nest_line = T) + 
  geom_point() + stat_compare_means(label = "p.signif")

save(val, file = "results/gene_specific_validation.rdata")

On RF model

hist(alphas_per_genes$rf_max_div_0[positive_genes_rf])

# load("results/100_permutations_bRF_validation.rdata")
# 
# load("results/100_permutations_lasso_validation.rdata")
# load("results/100_permutations_lasso_validation_cluster_pos.rdata")
load("results/gene_specific_validation.rdata")

load("results/100_permutations_bRF_validation_cluster_pos.rdata")
val_rf_pos <- val_conn_pos
load("results/100_permutations_bRF_validation.rdata")
val_rf_all <- val_conn


plot_positive <- val_rf_pos[(val_rf_pos$alpha == "0" | val_rf_pos$alpha == "1") & 
             as.numeric(val_rf_pos$rep) <=10 & val_rf_pos$density == 0.005 &
             val_rf_pos$method == "bRF",] %>%
  ggplot(aes(x=dataset, color = dataset, y = precision)) + 
  ggh4x::facet_nested_wrap(vars(alpha), 
                           ncol = 8, nest_line = T) +
  geom_boxplot(outlier.alpha = 0)+ ggtitle("Global alpha")+ 
  stat_compare_means(label = "p.signif")+
  ylim(c(0.2, 0.5)) + theme_pubr(legend = "none")+
  theme(strip.background = element_blank())+
  geom_hline(yintercept = 0.3807947)+
  val %>%
  filter(density == 0.005 & model == "rf" & genesInNetwork == "positive")%>%
  ggplot(aes(x=dataset, y = precision, color = dataset)) + 
  stat_compare_means(label = "p.signif") + 
  ylim(c(0.2, 0.5)) + stat_compare_means(label = "p.signif")+
  theme_pubr(legend = "none") +
  geom_hline(yintercept = 0.3807947)+
  geom_boxplot(outlier.alpha = 0) + ggtitle("Gene-specific alpha") +
  plot_annotation(title = "Precision of 0.005 density networks for bRF ", 
                  subtitle = "GRNs of positive genes only") +
  plot_layout(widths = c(2,1))


plot_all <- val_rf_all[(val_rf_all$alpha == "0") & 
             as.numeric(val_rf_all$rep) <=10 & val_rf_all$density == 0.005 &
             val_rf_all$method == "bRF",] %>%
  ggplot(aes(x=dataset, color = dataset, y = precision)) + 
  ggh4x::facet_nested_wrap(vars(alpha), 
                           ncol = 8, nest_line = T) +
  geom_boxplot(outlier.alpha = 0)+ ggtitle("Global alpha")+ 
  stat_compare_means(label = "p.signif")+
  geom_hline(yintercept = 0.3451811) + 
  ylim(c(0.2, 0.5)) + theme_pubr(legend = "none")+
  theme(strip.background = element_blank())+
  val %>%
  filter(density == 0.005 & model == "rf" & genesInNetwork == "all")%>%
  ggplot(aes(x=dataset, y = precision, color = dataset)) + 
  stat_compare_means(label = "p.signif") + 
  ylim(c(0.2, 0.5)) + stat_compare_means(label = "p.signif")+
  theme_pubr(legend = "none") +
  geom_hline(yintercept = 0.3451811) + 
  geom_boxplot(outlier.alpha = 0) + ggtitle("Gene-specific alpha") +
  plot_annotation(title = "Precision of 0.005 density networks for bRF ", 
                  subtitle = "GRNs of all genes") +
  plot_layout(widths = c(1,1))


plot_all

plot_positive

DAP only

# load("results/100_permutations_bRF_validation.rdata")
# 
# load("results/100_permutations_lasso_validation.rdata")
# load("results/100_permutations_lasso_validation_cluster_pos.rdata")
load("results/gene_specific_validation_dap.rdata")

load("results/100_permutations_bRF_validation_dap_cluster_pos.rdata")
val_rf_pos <- val_dap
load("results/100_permutations_bRF_validation_dap.rdata")
val_rf_all <- val_dap


plot_positive <- val_rf_pos[(val_rf_pos$alpha == "0" | val_rf_pos$alpha == "1") & 
             as.numeric(val_rf_pos$rep) <=10 & val_rf_pos$density == 0.005 &
             val_rf_pos$method == "bRF",] %>%
  ggplot(aes(x=dataset, color = dataset, y = precision)) + 
  ggh4x::facet_nested_wrap(vars(alpha), 
                           ncol = 8, nest_line = T) +
  geom_boxplot(outlier.alpha = 0)+ ggtitle("Global alpha")+ 
  stat_compare_means(label = "p.signif")+
  ylim(c(0.2, 0.5)) + theme_pubr(legend = "none")+
  theme(strip.background = element_blank())+
  geom_hline(yintercept = 0.3006536)+
  val %>%
  filter(density == 0.005 & model == "rf" & genesInNetwork == "positive")%>%
  ggplot(aes(x=dataset, y = precision, color = dataset)) + 
  stat_compare_means(label = "p.signif") + 
  ylim(c(0.2, 0.5)) + stat_compare_means(label = "p.signif")+
  theme_pubr(legend = "none") +
  geom_hline(yintercept = 0.3006536)+
  geom_boxplot(outlier.alpha = 0) + ggtitle("Gene-specific alpha") +
  plot_annotation(title = "Precision of 0.005 density networks for bRF ", 
                  subtitle = "GRNs of positive genes only") +
  plot_layout(widths = c(2,1))


plot_all <- val_rf_all[(val_rf_all$alpha == "0") & 
             as.numeric(val_rf_all$rep) <=10 & val_rf_all$density == 0.005 &
             val_rf_all$method == "bRF",] %>%
  ggplot(aes(x=dataset, color = dataset, y = precision)) + 
  ggh4x::facet_nested_wrap(vars(alpha), 
                           ncol = 8, nest_line = T) +
  geom_boxplot(outlier.alpha = 0)+ ggtitle("Global alpha")+ 
  stat_compare_means(label = "p.signif")+
  geom_hline(yintercept = 0.3373373) + 
  ylim(c(0.2, 0.5)) + theme_pubr(legend = "none")+
  theme(strip.background = element_blank())+
  val %>%
  filter(density == 0.005 & model == "rf" & genesInNetwork == "all")%>%
  ggplot(aes(x=dataset, y = precision, color = dataset)) + 
  stat_compare_means(label = "p.signif") + 
  ylim(c(0.2, 0.5)) + stat_compare_means(label = "p.signif")+
  theme_pubr(legend = "none") +
  geom_hline(yintercept = 0.3373373) + 
  geom_boxplot(outlier.alpha = 0) + ggtitle("Gene-specific alpha") +
  plot_annotation(title = "Precision of 0.005 density networks for bRF ", 
                  subtitle = "GRNs of all genes") +
  plot_layout(widths = c(1,1))


plot_all

plot_positive

On LASSO model

hist(alphas_per_genes$lasso_max_div_0[positive_genes_lasso])

# load("results/100_permutations_bRF_validation.rdata")
# 
# load("results/100_permutations_lasso_validation.rdata")
# load("results/100_permutations_lasso_validation_cluster_pos.rdata")
load("results/gene_specific_validation.rdata")

load("results/100_permutations_lasso_validation_cluster_pos.rdata")
val_lasso_pos <- val_conn_pos
load("results/100_permutations_lasso_validation.rdata")
val_lasso_all <- val_conn


plot_positive <- val_lasso_pos[(val_lasso_pos$alpha == "0" | val_lasso_pos$alpha == "1") & 
             as.numeric(val_lasso_pos$rep) <=10 & val_lasso_pos$density == 0.005 &
             val_lasso_pos$method == "LASSO",] %>%
  ggplot(aes(x=dataset, color = dataset, y = precision)) + 
  ggh4x::facet_nested_wrap(vars(alpha), 
                           ncol = 8, nest_line = T) +
  geom_boxplot(outlier.alpha = 0)+ ggtitle("Global alpha")+ 
  stat_compare_means(label = "p.signif")+
  ylim(c(0.2, 0.5)) + theme_pubr(legend = "none")+
  theme(strip.background = element_blank())+
  geom_hline(yintercept = 0.2921348)+
  val %>%
  filter(density == 0.005 & model == "lasso" & genesInNetwork == "positive")%>%
  ggplot(aes(x=dataset, y = precision, color = dataset)) + 
  stat_compare_means(label = "p.signif") + 
  ylim(c(0.2, 0.5)) + stat_compare_means(label = "p.signif")+
  theme_pubr(legend = "none") +
  geom_hline(yintercept = 0.2921348)+
  geom_boxplot(outlier.alpha = 0) + ggtitle("Gene-specific alpha") +
  plot_annotation(title = "Precision of 0.005 density networks for LASSO ", 
                  subtitle = "GRNs of positive genes only") +
  plot_layout(widths = c(2,1))


plot_all <- val_lasso_all[(val_lasso_all$alpha == "0") & 
             as.numeric(val_lasso_all$rep) <=10 & val_lasso_all$density == 0.005 &
             val_lasso_all$method == "LASSO",] %>%
  ggplot(aes(x=dataset, color = dataset, y = precision)) + 
  ggh4x::facet_nested_wrap(vars(alpha), 
                           ncol = 8, nest_line = T) +
  geom_boxplot(outlier.alpha = 0)+ ggtitle("Global alpha")+ 
  stat_compare_means(label = "p.signif")+
  geom_hline(yintercept = 0.3451811) + 
  ylim(c(0.2, 0.5)) + theme_pubr(legend = "none")+
  theme(strip.background = element_blank())+
  val %>%
  filter(density == 0.005 & model == "lasso" & genesInNetwork == "all")%>%
  ggplot(aes(x=dataset, y = precision, color = dataset)) + 
  stat_compare_means(label = "p.signif") + 
  ylim(c(0.2, 0.5)) + stat_compare_means(label = "p.signif")+
  theme_pubr(legend = "none") +
  geom_hline(yintercept = 0.3451811) + 
  geom_boxplot(outlier.alpha = 0) + ggtitle("Gene-specific alpha") +
  plot_annotation(title = "Precision of 0.005 density networks for LASSO ", 
                  subtitle = "GRNs of all genes") +
  plot_layout(widths = c(1,1))


plot_all

plot_positive

DAP only

# load("results/100_permutations_bRF_validation.rdata")
# 
# load("results/100_permutations_lasso_validation.rdata")
# load("results/100_permutations_lasso_validation_cluster_pos.rdata")
load("results/gene_specific_validation_dap.rdata")

load("results/100_permutations_lasso_validation_dap_cluster_pos.rdata")
val_rf_pos <- val_dap
load("results/100_permutations_lasso_validation_dap.rdata")
val_rf_all <- val_dap


plot_positive <- val_rf_pos[(val_rf_pos$alpha == "0" | val_rf_pos$alpha == "1") & 
             as.numeric(val_rf_pos$rep) <=10 & val_rf_pos$density == 0.005 &
             val_rf_pos$method == "LASSO",] %>%
  ggplot(aes(x=dataset, color = dataset, y = precision)) + 
  ggh4x::facet_nested_wrap(vars(alpha), 
                           ncol = 8, nest_line = T) +
  geom_boxplot(outlier.alpha = 0)+ ggtitle("Global alpha")+ 
  stat_compare_means(label = "p.signif")+
  ylim(c(0.2, 0.5)) + theme_pubr(legend = "none")+
  theme(strip.background = element_blank())+
  geom_hline(yintercept = 0.2653061)+
  val %>%
  filter(density == 0.005 & model == "lasso" & genesInNetwork == "positive")%>%
  ggplot(aes(x=dataset, y = precision, color = dataset)) + 
  stat_compare_means(label = "p.signif") + 
  ylim(c(0.2, 0.5)) + stat_compare_means(label = "p.signif")+
  theme_pubr(legend = "none") +
  geom_hline(yintercept = 0.2653061)+
  geom_boxplot(outlier.alpha = 0) + ggtitle("Gene-specific alpha") +
  plot_annotation(title = "Precision of 0.005 density networks for lasso ", 
                  subtitle = "GRNs of positive genes only") +
  plot_layout(widths = c(2,1))


plot_all <- val_rf_all[(val_rf_all$alpha == "0") & 
             as.numeric(val_rf_all$rep) <=10 & val_rf_all$density == 0.005 &
             val_rf_all$method == "LASSO",] %>%
  ggplot(aes(x=dataset, color = dataset, y = precision)) + 
  ggh4x::facet_nested_wrap(vars(alpha), 
                           ncol = 8, nest_line = T) +
  geom_boxplot(outlier.alpha = 0)+ ggtitle("Global alpha")+ 
  stat_compare_means(label = "p.signif")+
  geom_hline(yintercept = 0.3373373) + 
  ylim(c(0.2, 0.5)) + theme_pubr(legend = "none")+
  theme(strip.background = element_blank())+
  val %>%
  filter(density == 0.005 & model == "rf" & genesInNetwork == "all")%>%
  ggplot(aes(x=dataset, y = precision, color = dataset)) + 
  stat_compare_means(label = "p.signif") + 
  ylim(c(0.2, 0.5)) + stat_compare_means(label = "p.signif")+
  theme_pubr(legend = "none") +
  geom_hline(yintercept = 0.3373373) + 
  geom_boxplot(outlier.alpha = 0) + ggtitle("Gene-specific alpha") +
  plot_annotation(title = "Precision of 0.005 density networks for lasso ", 
                  subtitle = "GRNs of all genes") +
  plot_layout(widths = c(1,1))


plot_all

plot_positive

recall

# load("results/100_permutations_bRF_validation.rdata")
# 
# load("results/100_permutations_lasso_validation.rdata")
# load("results/100_permutations_lasso_validation_cluster_pos.rdata")
load("results/gene_specific_validation.rdata")

load("results/100_permutations_bRF_validation_cluster_pos.rdata")
val_rf_pos <- val_conn_pos
load("results/100_permutations_bRF_validation.rdata")
val_rf_all <- val_conn


plot_positive <- val_rf_pos[(val_rf_pos$alpha == "0" | val_rf_pos$alpha == "1") & 
             as.numeric(val_rf_pos$rep) <=10 & val_rf_pos$density == 0.005 &
             val_rf_pos$method == "bRF",] %>%
  ggplot(aes(x=dataset, color = dataset, y = recall)) + 
  ggh4x::facet_nested_wrap(vars(alpha), 
                           ncol = 8, nest_line = T) +
  geom_boxplot(outlier.alpha = 0)+ ggtitle("Global alpha")+ 
  stat_compare_means(label = "p.signif")+ ylim(c(0,0.03))+
  theme_pubr(legend = "none")+
  theme(strip.background = element_blank())+
  val %>%
  filter(density == 0.005 & model == "rf" & genesInNetwork == "positive")%>%
  ggplot(aes(x=dataset, y = recall, color = dataset)) + 
  stat_compare_means(label = "p.signif") + 
  stat_compare_means(label = "p.signif")+
  theme_pubr(legend = "none") + ylim(c(0,0.03))+
  geom_boxplot(outlier.alpha = 0) + ggtitle("Gene-specific alpha") +
  plot_annotation(title = "Recall of 0.005 density networks for bRF ", 
                  subtitle = "GRNs of positive genes only") +
  plot_layout(widths = c(2,1))


plot_all <- val_rf_all[(val_rf_all$alpha == "0") & 
             as.numeric(val_rf_all$rep) <=10 & val_rf_all$density == 0.005 &
             val_rf_all$method == "bRF",] %>%
  ggplot(aes(x=dataset, color = dataset, y = recall)) + 
  ggh4x::facet_nested_wrap(vars(alpha), 
                           ncol = 8, nest_line = T) +
  geom_boxplot(outlier.alpha = 0)+ ggtitle("Global alpha")+ 
  stat_compare_means(label = "p.signif")+
  theme_pubr(legend = "none")+ ylim(c(0,0.03))+
  theme(strip.background = element_blank())+
  val %>%
  filter(density == 0.005 & model == "rf" & genesInNetwork == "all")%>%
  ggplot(aes(x=dataset, y = recall, color = dataset)) + 
  stat_compare_means(label = "p.signif") + 
  stat_compare_means(label = "p.signif")+
  theme_pubr(legend = "none") +
  geom_boxplot(outlier.alpha = 0) + ggtitle("Gene-specific alpha") +
  plot_annotation(title = "Recall of 0.005 density networks for bRF ", 
                  subtitle = "GRNs of all genes") +
  plot_layout(widths = c(1,1)) + ylim(c(0,0.03))


plot_all
plot_positive